* written for Stata 14.2 with user-written packages: coefplot, estout, grc1leg
* change the line below before running
cd [change to directory where Newman.dta resides and output will be saved]
tempfile stats_c stats_g

use Newman.dta if permission==1 & !mi(pred,role), clear
bys semester_x role: gen size_role=_N
bys semester_x: egen size = max(size_role)
gen cleancluster=size==1
drop size*
recode debias (.=0)
gen female = (gender==2)*100
gen prediction100=prediction*100

* figure 1: visualizing the data
graph hbar (count) prediction, over(debias) over(role) over(semester) ///
	scheme(s1color) title("{it:N}") bar(2, color(ltblue)) ///
	ytitle("") legend(order(2) label(2 "debiasing sub-condition") symysize(*.3)) ///
	name(figure1a, replace) fxsize(100) nodraw
graph hbar female, over(debias, label(nolabel)) over(role, label(nolabel)) over(semester, label(nolabel)) ///
	scheme(s1color) title("% Female", span) bar(1, color(pink)) bar(2, color(ltblue)) ///
	ytitle("") name(figure1b, replace) fxsize(50) nodraw
graph hbar prediction100, over(debias, label(nolabel)) over(role, label(nolabel)) over(semester, label(nolabel)) ///
	scheme(s1color) title("% Predicting Pet. Win", span) bar(1, color(black)) bar(2, color(ltblue))  ///
	ytitle("") name(figure1c, replace) fxsize(50) nodraw
grc1leg figure1a figure1b figure1c, ycommon ///
	title("Figure 1: Sample and Outcomes, by Condition") scheme(s1color) rows(1) imargin(0 0 0) saving(figure1.gph, replace)

* randomization tests
preserve
statsby p=r(p_exact), by(semester) total clear: tab role gender, exact
export delimited using randomizationchecks.csv, replace
restore

* figure 2: main effects (univariate)
recode role (2 = .) (3=0), gen(role2)
preserve
statsby, by(female) total saving(`stats_g'): count if role!=2
statsby, by(female) total clear: cs prediction role2, exact
merge 1:1 female using `stats_g', nogenerate
save `stats_g', replace
restore, preserve
statsby, by(testcondition) total saving(`stats_c'): count if role!=2
statsby, by(testcondition) total clear: cs prediction role2, exact
merge 1:1 testcondition using `stats_c', nogenerate
recode testcondition (.=-2)
label define testcondition -2 "ALL" 0 "BY EXPERIMENTAL CONDITION" 5 "BY GENDER" 6 `"Male or "rather not say""' 7 "Female", add
append using `stats_g'
save graphs_tables/univariate_test_statistics.dta, replace // not reported as such but good for reading off estimates and confidence intervals reported in text
replace testc = 6 if female==0 & mi(testc)
replace testc = 7 if female==100 & mi(testc)
gen p = "{it:p}=" + strofreal(p_exact, "%4.3f") + " (n=" + strofreal(N,"%3.0f") + ")"
twoway (rspike ub_rd lb_rd testcondition, horizontal) (dot rd testcondition, horizontal dotextend(no) ndots(0) mlabel(p) mlabcolor(black) mlabposition(1)), ///
	xline(0, lcolor(black) lpattern(dash)) yscale(reverse range(-2.5/7.5)) ylabel(-2 0 1 2 3 5 6 7, valuelabel noticks angle(horizontal) nogrid) ///
	ytitle("") xtitle("Fraction petitioners minus fraction respondents" "predicting petitioner win") ///
	title("Figure 2: Main Treatment Effect (Univariate)", span) ///
	note("Spikes indicate asymptotic 95% confidence intervals." "{it:p}-values are from two-sided Fischer exact tests.") ///
	legend(off) scheme(s1color) aspectratio(.75) saving(figure_2.gph, replace)
restore
*/

* figure 3: treatment effects and interactions (multivariate)
estimates clear
recode female (100=1)
local regression "reg prediction ib3.role##i.debias ib3.role##1.semester "
_eststo: `regression' i.semester, robust
_eststo: `regression' i.semester if cleancluster, cluster(semester_x_group)
_eststo: `regression' i.semester, cluster(semester_x_group)
_eststo: xt`regression', fe cluster(semester_x_group)
_eststo: `regression' i.semester ib3.role##1.gender, cluster(semester_x_group)
_eststo: xt`regression' ib3.role##1.gender, fe cluster(semester_x_group)
esttab using table_fig3.csv, star(+ 0.10 * 0.05 ** 0.01 *** .001) drop(?.semester) nobaselevels label /// this table is not in paper but helpful to read off coefficients and s.e.'s
	b(2) ci(2) r2(2) replace

coefplot (est1, label(no gender controls, semester FE, no clustering)) (est2, label(+ clustering by group (clean only))) (est3, label(+ using all groups)) ///
	(est4, label(+ group FE)) (est5, label(gender controls, semester FE, clustering by group (all)))(est6, label(+ group FE)), ///
	xline(0, lcolor(black) lpattern(dash)) ///
	keep(1.role 1.role#1.debias 1.role#1.semester 1.role#1.gender)  ///
	heading(1.role#1.debias = `""{bf:Interactions:}" "(Petitioner - Respondent)""', nogap) ///
	coeflabel(1.role = `""{bf:Main effect:}" "Petitioner - Respondent ({it:{&beta}{subscript:P}})""' ///
		1.role#1.debias = "{c 215} Debiasing ({it:{&delta}{subscript:D,P}})" ///
		1.role#1.semester = "{c 215} Pre-class  ({it:{&delta}{subscript:PC,P}})" ///
		1.role#1.gender = "{c 215} Female ({it:{&delta}{subscript:F,P}})" ///
		, notick) ///
	legend(col(2) holes(5 6) span rowgap(.5) size(vsmall)) ///
	graphregion(margin(l=15)) scheme(s1color) ///
	title("Figure 3: Treatment Effects and Interactions (Multivariate)", span) ///
	subtitle("OLS estimates and 95% c.i. of equation (1)", span) ///
	saving(figure3.gph, replace) 

* confidence by gender
ttest certainty if gender!=3, by(gender)
